load libraries

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

load data

years <- 2017:2021
quarters <- 1:4
types <- c("Electric","Gas")

pge_data_raw <- NULL

for(year in years) {
  for(quarter in quarters) {
    for(type in types) {
      
      filename <-
        paste0(
          "pge_data/PGE_",
          year,
          "_Q",
          quarter,
          "_",
          type,
          "UsageByZip.csv"
        )

      # print(filename)
        
      # for Q's that don't exist
      if(!file.exists(filename)) next
      
      temp <- read_csv(filename)
      
      
      # converting to standardize units
      
      if(type == "Electric") {
        temp <-
          temp %>%
          mutate(TOTALKBTU = TOTALKWH * 3.412) %>%
          select(-AVERAGEKWH,-TOTALKWH)
      }
      
      if(type == "Gas") {
        temp <-
          temp %>%
          mutate(TOTALKBTU = TOTALTHM * 99.976) %>%
          select(-AVERAGETHM,-TOTALTHM)
      }

      pge_data_raw <-
        rbind(pge_data_raw, temp)
      
    }
  }
}
## Rows: 7776 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3486 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7819 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3489 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7842 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3491 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 10455 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4662 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7852 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3499 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7880 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3502 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7877 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3503 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7875 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3496 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7870 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3499 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7887 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3499 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7891 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3489 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7874 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3490 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7865 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3493 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7791 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3461 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7784 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3459 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7771 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3456 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7580 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3458 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7611 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3459 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

manipulating data

# make two data objects for plotting later
# one for resdiential, one for commercial
# pipeline functions: filter(), select(), mutate(), group_by(), summarise()
# residential data prep via pipes
# 
# classes <- c("Residential","Commercial")
# 
# for(class in classes) {
  pge_data_plottable <-
    pge_data_raw %>% 
    filter(
      CUSTOMERCLASS %in%
        c(
          "Elec- Commercial",
          "Elec- Residential",
          "Gas- Commercial",
          "Gas- Residential"
        )
    ) %>% 
    select(
      MONTH, YEAR, CUSTOMERCLASS, TOTALKBTU
    ) %>% 
    mutate(
      MONTH_INDEX = 12 * (YEAR %% 2017) + MONTH
    ) %>% 
    group_by(
      MONTH_INDEX, CUSTOMERCLASS
    ) %>% 
    summarise(
      TOTALKBTU = sum(TOTALKBTU, na.rm = T)
    )
## `summarise()` has grouped output by 'MONTH_INDEX'. You can override using the `.groups` argument.
#left off from textbook 1.7 3/4 down, "You'll notice that.."

plotting

# plot the RESIDENTIAL data
res_plot <-
  pge_data_plottable %>% 
  filter(
    CUSTOMERCLASS %in% c(
      "Elec- Residential",
      "Gas- Residential"
    )
  ) %>% 
  ggplot() +
  geom_bar(
    aes(
      x = MONTH_INDEX,
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "kBtu",
    title = "Residential Energy Use 2017-2021",
    fill = "Energy Type"
  )

res_plot %>% ggplotly()
# plot the COMMERCIAL data
com_plot <-
  pge_data_plottable %>% 
  filter(
    CUSTOMERCLASS %in% c(
      "Elec- Commercial",
      "Gas- Commercial"
    )
  ) %>% 
  ggplot() +
  geom_bar(
    aes(
      x = MONTH_INDEX,
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "kBtu",
    title = "Commercial Energy Use 2017-2021",
    fill = "Energy Type"
  )

com_plot %>% ggplotly()

commentary

2. Comment on any observable changes in energy consumption that may be attributable to the COVID-19 pandemic 
    a. (you are encouraged to create additional plots that help emphasize the change between 2019 and 2020). 
    b. 2019 = months 25 - 36
    c. 2020 = months 37 - 48
    d. Residential
        i. No glaring / obvious change from 2019 to 2020 energy use
    e. Commercial
        i. Noticeable dip in 2020 April, May, June (first three months of shelter in place, economy shut down)
    f. Side note: what is that huge spike in September 2017?! It occurs in both residential and commercial, mostly in electricity use but also notably in gas use.
        i. Thomas fire?
3. Explain any key assumptions you made in the analysis, or caveats about the data sources that you think the reader should be aware of.
    a. Assumptions / Caveats:
        i. Assumptions: 
            1) assumed accuracy and thoroughness of data
            2) Didn't double check for overlapping zipcodes
        ii. Caveats:
            1) PG&E potentially not a neutral source for data
            2) i.e. might have tension considering political / reputation issues 

next steps: